I am a newbie in puppeteer and I can't understand why the following code cannot work. any explanation would be appreciated.
const puppeteer=require("puppeteer");
(async () => {
const browser = await puppeteer.launch({headless:false});
const page = await browser.newPage();
await page.goto('https://bet254.com');
await page.evaluate(async ()=> {
window.addEventListener("load",(event)=>{
document.alert("Loaded!");
})
});
})();
I was expecting an alert after loading. But nothing happened! How can I add a listener to show an alert on page load?
page.goto already waits for the page to load, so by the time your evalute runs, you can't re-wait for the page to load, so the load event will never fire.
Another problem is that document.alert isn't a function. You may be thinking of document.write or window.alert. In any case, neither function is particularly useful for debugging, so I suggest sticking to console.log unless you have a very compelling reason not to.
When working with Puppeteer, it's important to isolate problems by running your evaluate code by hand in the browser without Puppeteer, otherwise you might have no idea whether it's Puppeteer or the browser code that's failing.
Anything logged in evaluate won't be shown in your Node stdout or stderr, so you'll probably want to monitor that with a log listener. You'll need to look at both Node and the browser console for errors.
Depending on what you're trying to accomplish, page.evaluateOnNewDocument(pageFunction[, ...args]) will let you attach code to evaluate whenever you navigate, which might be what you're trying for here.
Here's an example of alerting headfully:
const puppeteer = require("puppeteer"); // ^19.6.3
let browser;
(async () => {
browser = await puppeteer.launch({headless: false});
const [page] = await browser.pages();
await page.evaluateOnNewDocument(() => {
window.addEventListener("load", event => {
alert("Loaded!");
});
});
await page.goto("https://www.example.com", {waitUntil: "load"});
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
Using console.log headlessly, with the log redirected to Node:
const puppeteer = require("puppeteer");
const onPageConsole = msg =>
Promise.all(msg.args().map(e => e.jsonValue()))
.then(args => console.log(...args));
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
page.on("console", onPageConsole);
await page.evaluateOnNewDocument(() => {
window.addEventListener("load", event => {
console.log("Loaded!");
});
});
await page.goto("https://www.example.com", {waitUntil: "load"});
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
If all you're trying to do is run some code in the browser after load, then you might not even need to attach a listener to the load event at all:
const onPageConsole = msg =>
Promise.all(msg.args().map(e => e.jsonValue()))
.then(args => console.log(...args));
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
page.on("console", onPageConsole);
await page.goto("https://www.example.com", {waitUntil: "load"});
await page.evaluate(() => console.log("Loaded!"));
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
Or if the code you want to run is purely Node just use normal control flow:
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto("https://www.example.com", {waitUntil: "load"});
console.log("Loaded!");
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
By the way, there's no need to make a function async unless you have await in it somewhere.
See also Puppeteer wait until page is completely loaded.
Related
I'm triyng to get my puppeteer to login with my gmail on zalando. Im using the id for the button so it can typ my gmail into it but it just doesn't want to. Can you help me?
This is where the id, class etc is:
<input type="email" class="cDRR43 WOeOAB _0Qm8W1 _7Cm1F9 FxZV-M bsVOrE
mo6ZnF dUMFv9 K82if3 LyRfpJ pVrzNP NN8L-8 QGmTh2 Vn-7c-"
id="login.email" data-testid="email_input" name="login.email" value=""
placeholder="E-postadress" autocomplete="email">
This is my code:
const puppeteer = require('puppeteer');
const product_url = "https://www.zalando.se/nike-sportswear-air-flight-lite-mid-hoega-sneakers- whiteblack-ni112n02z-a11.html"
const cart = "https://www.zalando.se/cart"
async function givePage(){
const browser = await puppeteer.launch({headless: false})
const page = await browser.newPage();
return page;
}
async function addToCart(page){
// going to website
await page.goto(product_url)
// clicking "handla"
await page.waitForSelector("button[class='DJxzzA u9KIT8 uEg2FS U_OhzR ZkIJC- Vn-7c- FCIprz heWLCX JIgPn9 LyRfpJ pxpHHp Md_Vex NN8L-8 GTG2H9 MfX1a0 WCjo-q EKabf7 aX2-iv r9BRio mo6ZnF PLvOOB']");
await page.click("button[class='DJxzzA u9KIT8 uEg2FS U_OhzR ZkIJC- Vn-7c- FCIprz heWLCX JIgPn9 LyRfpJ pxpHHp Md_Vex NN8L-8 GTG2H9 MfX1a0 WCjo-q EKabf7 aX2-iv r9BRio mo6ZnF PLvOOB']", elem => elem.click());
// clicking "OK" to cookies
await page.waitForSelector("button[class='uc-btn uc-btn-primary']");
await page.click("button[class='uc-btn uc-btn-primary']", elem => elem.click());
// clicking "size EU 41"
await page.evaluate(() => document.getElementsByClassName('_6G4BGa _0Qm8W1 _7Cm1F9 FxZV-M IvnZ13 Pb4Ja8 ibou8b JT3_zV ZkIJC- Md_Vex JCuRr_ na6fBM _0xLoFW FCIprz pVrzNP KRmOLG NuVH8Q')[4].click());
console.log("körs")
await page.evaluate(async() => { setTimeout(function(){ console.log('waiting'); }, 1000);});
// going to "cart"
await page.goto(cart)
// clicking "gå till checkout"
await page.waitForSelector("button[class='z-1-button z-coast-base-primary-accessible z-coast-base__sticky-sumary__cart__button-checkout z-1-button--primary z-1-button--button']");
await page.click("button[class='z-1-button z-coast-base-primary-accessible z-coast-base__sticky-sumary__cart__button-checkout z-1-button--primary z-1-button--button']", elem => elem.click());
}
async function Login(page){
await page.evaluate(async() => { setTimeout(function(){ console.log('waiting'); }, 1000);});
await page.type("input[id='login.email']", 'david.exartor#gmail.com');
}
async function checkout(){
var page = await givePage();
await addToCart(page);
await Login(page);
}
checkout();
I've tried using the other things such as the name, class and testid but still no success. I was expecting that something would work but nothing did.
You're missing waiting for that input selector:
const uname = await page.waitForSelector("[id='login.email']");
await uname.type('david.exartor#gmail.com');
Suggestions/notes:
This code:
await page.click("button[class='uc-btn uc-btn-primary']", elem => elem.click());
can just be:
await page.click("button[class='uc-btn uc-btn-primary']");
The second argument is supposed to be an options object, not a callback. If you want to trigger a native click, use:
await page.$eval("button[class='uc-btn uc-btn-primary']", el => el.click());
When I run into trouble automating a login, I often add a userDataDir and pop open a browser session so I can log in to the site manually.
Try to avoid sleeping. It slows down your script and can lead to random failures. Pick tighter predicates like waitForSelector or waitForFunction and encode the exact condition you're waiting on.
Luckily, your attempts at sleeping don't actually do much of anything:
await page.evaluate(async() => { setTimeout(function(){ console.log('waiting'); }, 1000);});
This just logs to the browser console after a second but doesn't block in Puppeteer. The async keyword isn't necessary. To actually sleep in the browser, you could do:
await page.evaluate(() => new Promise(r => setTimeout(r, 1000)));
or just sleep in Node:
await new Promise(r => setTimeout(r, 1000));
If you run console.log(await page.content()) headlessly, you'll see the site is detecting you as a bot and not returning the login page. The canonical is Why does headless need to be false for Puppeteer to work? if you plan to run headlessly in the future.
The givePage function leaks a browser handle, hanging the process. Better to write your script without abstractions until you have everything working, then factor out abstractions. My usual boilerplate is something like:
const puppeteer = require("puppeteer");
const scrape = async page => {
// write your code here
const url = "https://www.example.com";
await page.goto(url, {waitUntil: "domcontentloaded"});
console.log(await page.title());
};
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await scrape(page);
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
Be extremely careful with your [class="foo bar baz"] selectors. These are rigid and overly-precise relative to the preferred .foo.bar.baz version. The former is an exact match, so if another class shows up or the order of the classes change, your script will break. Here's an example of the problem:
const puppeteer = require("puppeteer"); // ^19.0.0
const html = `<p class="foo bar">OK</p>`;
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.setContent(html);
const p = (...args) => console.log(...args);
const text = sel => page
.$eval(sel, el => el.textContent)
.catch(err => "FAIL");
// Good:
p(await text(".foo.bar")); // => OK
p(await text(".bar.foo")); // => OK
p(await text(".foo")); // => OK
p(await text(".bar")); // => OK
// Works but verbose:
p(await text('[class~="foo"][class~="bar"]')); // => OK
// Works but brittle:
p(await text('[class="foo bar"]')); // => OK
// Special cases that are sometimes necessary:
p(await text('[class^="foo "]')); // => OK
p(await text('[class$=" bar"]')); // => OK
p(await text('[class*="fo"]')); // => OK
// Fails:
p(await text('[class="foo"]')); // => FAIL
p(await text('[class="bar"]')); // => FAIL
p(await text('[class="bar foo"]')); // => FAIL
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
The [attr=""] selector is suitable in uncommon situations when you need to test semantics like "begins with", "ends with", "substring" or in a very rare case where you actually need to distinguish between class="foo bar" and class="bar foo", which I've never had to do before.
Be careful with overly-specific selectors like .foo.bar.baz.quux.garply.corge. If you can distinguish that element with a simple .foo or a #baz .foo, just use that in most circumstances. Related: overusing browser-generated selectors and Convenient way to get input for puppeteer page.click().
Block images and extra resources to speed up your script once you get the basic functionality working.
In the following code snippet, I try to click a button (after some Timeout) within the page.evaluate function. It does not work. Yet, when I open the console in the launched browser and manually type const btn = document.querySelectorAll("form button")[1]; btn.click() it does.
Can anyone explain to me the cause of this difference in behavior and how to fix it?
Here's a minimal reproducible example:
import { resolve } from 'path';
import puppeteer from 'puppeteer'
//go to page and handle cookie requests
const browser = await puppeteer.launch({defaultViewport: {width: 1920, height: 1080},
headless:false, args: ['--start-maximized']});
const page = await browser.newPage();
const url = "https://de.finance.yahoo.com/";
await page.goto(url);
await page.waitForSelector("div.actions");
await page.evaluate( () => {
let z= document.querySelector("div.actions"); z.children[4].click()
})
await page.waitForSelector("input[id=yfin-usr-qry]");
await page.evaluate( () => {let z= document.querySelector("input[id=yfin-usr-qry]");
z.value = "AAPL"; const btn = document.querySelectorAll("form button")[1];
return new Promise((resolve) => setTimeout(() => {btn.click();resolve()},1000))})
})
The form button selector appears to be incorrect, selecting a non-visible element with class .modules_clearBtn__uUU5h.modules_noDisplay__Qnbur. I'd suggest selecting by .finsrch-btn or #UH-0-UH-0-Header .finsrch-btn if you have to select this, but it's not really necessary, so I won't use it in my suggested solution below.
Beyond that, I'd tighten up some of the selectors, skip the timeout and prefer using trusted Puppeteer events when possible.
I'm not sure what data you want on the final page but this should give you a screenshot of it, showing all of the content:
const puppeteer = require("puppeteer"); // ^18.0.4
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
const $ = (...args) => page.waitForSelector(...args);
const url = "https://de.finance.yahoo.com/";
await page.goto(url, {waitUntil: "domcontentloaded"});
await (await $('button[name="agree"]')).click();
const input = await $("#yfin-usr-qry");
await input.type("AAPL");
await page.keyboard.press("Enter");
await $("#AAPL-interactive-2col-qsp-m");
await page.evaluate("scrollTo(0, document.body.scrollHeight)");
await $("#recommendations-by-symbol");
await page.screenshot({path: "aapl.png", fullPage: true});
})()
.catch(err => console.error(err))
.finally(() => browser?.close())
;
That said, rather than navigating to the homepage, typing in a search, then pressing a button, you could consider building the URL directly, e.g. https://de.finance.yahoo.com/quote/${symbol} and navigating right to it. This is generally faster, more reliable, and easier to code.
I'm using Puppeteer and jsDOM to scrape this site: https://www.lcfc.com/matches/results.
I want the names of the teams of every match, so on the console I use this:
document.querySelectorAll('.match-item__team-container span')
.forEach(element => console.log(element.textContent));
On the console, the names prints ok but when I use this on my code it returns nothing.
This is my code:
const puppeteer = require('puppeteer');
const jsdom = require('jsdom');
(async () => {
try {
const browser = await puppeteer.launch() ;
const page = await browser.newPage();
const response = await page.goto('https://www.lcfc.com/matches/results');
const body = await response.text();
const { window: { document } } = new jsdom.JSDOM(body);
document.querySelectorAll('.match-item__team-container span')
.forEach(element => console.log(element.textContent));
await browser.close();
} catch (error) {
console.error(error);
}
})();
And I don't have any error. Some suggestion? Thank you.
I tried with this code now, but still not working. I show the code and a picture of the console:
const puppeteer = require('puppeteer');
(async () => {
try {
const browser = await puppeteer.launch() ;
const page = await browser.newPage();
await page.waitForSelector('.match-item__team-container span');
const data = await page.evaluate(() => {
document.querySelectorAll('.match-item__team-container span')
.forEach(element => console.log(element.textContent));
});
//listen to console events in the chrome tab and log it in nodejs process
page.on('console', consoleObj => console.log(consoleObj.text()));
await browser.close();
} catch (error) {
console.log(error);
}
})();
Do it puppeter way and use evaluate to run your code after waiting for the selector to appear via waitForSelector
await page.waitForSelector('.match-item__team-container span');
const data = await page.evaluate(() => {
document.querySelectorAll('.match-item__team-container span')
.forEach(element => console.log(element.textContent));
//or return the values of the selected item
return somevalue;
});
//listen to console events in the chrome tab and log it in nodejs process
page.on('console', consoleObj => console.log(consoleObj.text()));
evaluate runs your code inside the active tab of the chrome so you will not need jsDOM to parse the response.
UPDATE
The new timeout issue is because the page is taking too long to load: use {timeout : 0}
const data = await page.evaluate(() => {
document.querySelectorAll('.match-item__team-container span')
.forEach(element => console.log(element.textContent));
//or return the values of the selected item
return somevalue;
},{timeout:60000});
I am trying to wait for a popup to load completely before proceeding but i am not sure how to accomplish this, currently i am using a await page.waitFor(3000);. Is there a more elegant way to do this and wait for the popup to fully load and then proceed.
below is my relevant part of the code.
await page.evaluate(async () => {
await $('#myDataExport').click();
await $('.export-btn a').click();
},);
await page.waitFor(3000);
const browserPages = await browser.pages();
const exportPopup = browserPages[browserPages.length - 1];
I have also tried to use the below
await Promise.all([
await page.click('.export-btn a'),
await page.waitForNavigation({ waitUntil: 'networkidle2' }),
]);
But I get an error Error: Node is either not visible or not an HTMLElement
Any help in this would be really great, Thanks.
I tried to make a working example. You can just ignore the request interception code.
const puppeteer = require('puppeteer')
;(async () => {
const browser = await puppeteer.launch({headless: false})
const [page] = await browser.pages()
// This network interception due to massive ads on the page
// You can remove this if you like, as this is just an example
// page.setRequestInterception(true)
// page.on('request', request => {
// if (request.url().startsWith('https://www.w3schools.com/')) {
// request.continue()
// } else {
// request.abort()
// }
// })
await page.goto('https://www.w3schools.com/tags/att_a_target.asp', {waitUntil: 'domcontentloaded'})
const [popup] = await Promise.all([
new Promise(resolve => page.on('popup', resolve)),
// THE LINES COMMENTED BELOW IS JUST AN W3SCHOOL EXAMPLE
// page.waitForSelector('a[target="_blank"].w3-btn.w3-margin-bottom'),
// page.click('a[target="_blank"].w3-btn.w3-margin-bottom'),
// YOUR CODE SHOULD LIKE THIS
page.waitForSelector('.export-btn a'),
page.click('.export-btn a'),
])
await popup.waitForSelector('#iframeResult')
await popup.screenshot({path: 'targetpopup.png'})
await popup.close()
await browser.close()
})()
Have you tried: browser.once with targetcreated target domain event?
Calling target.page() connects Puppeteer to the tab and generates a Page object.
New tabs aren't opened immediately on click. A way to await events is to create a new promise. [source]
Example:
const newPagePromise = new Promise(resolve => browser.once('targetcreated', target => resolve(target.page()));
await page.click('.export-btn a');
const newPage = await newPagePromise;
Finally I figured how to use Node.js. Installed all libraries/extensions. So puppeteer is working, but as it was previous with Xmlhttp... it gets only template/body of the page, without needed information. All scripts on the page engage after few second it had been opened in browser (Web app?). I need to get information inside certain tags after Whole page is loaded. Also, I would ask, if it possible to have pure JavaScript, because I do not use jQuery like code. So it doubles difficulty for me...
Here what I have so far.
const puppeteer = require('puppeteer');
const $ = require('cheerio');
let browser;
let page;
const url = "really long link with latitude and attitude";
(async () => puppeteer
.launch()
.then(await function(browser) {
return browser.newPage();
})
.then(await function(page) {
return page.goto(url).then(function() {
return page.content();
});
})
.then(await function(html) {
$('strong', html).each(function() {
console.log($(this).text());
});
})
.catch(function(err) {
//handle error
}))();
I get only template default body elements inside strong tag. But it should contain a lot more data than just 10 items.
If you want full html same as inspect? Here it is:
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://example.org/', { waitUntil: 'networkidle0' });
const data = await page.evaluate(() => document.querySelector('*').outerHTML);
console.log(data);
await browser.close();
} catch (err) {
console.error(err);
}
})();
let bodyHTML = await page.evaluate(() => document.documentElement.outerHTML);
This
Some notes:
You need not cheerio with puppeteer and you need not reparse page.content(): you already have the full DOM with all scripts run and you can evaluate any code in window context like in a browser using page.evaluate() and transferring serializable data between web API context and Node.js API context.
Try to use async/await only, this will simplify your code and flow.
If you need to wait till all the scripts and other dependencies are loaded, use waitUntil: 'networkidle0' in page.goto().
If you suspect that document scripts need some time till the needed state, use various test functions like page.waitForSelector() or fall back to page.waitFor(milliseconds).
Here is a simple script that outputs all tag names in a page.
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://example.org/', { waitUntil: 'networkidle0' });
const data = await page.evaluate(
() => Array.from(document.querySelectorAll('*'))
.map(elem => elem.tagName)
);
console.log(data);
await browser.close();
} catch (err) {
console.error(err);
}
})();
You can specify your task in more details and we can try to write something more appropriate.
Script for www.bezrealitky.cz (task from a comment below):
'use strict';
const fs = require('fs');
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
page.setDefaultTimeout(0);
await page.goto('https://www.bezrealitky.cz/vyhledat?offerType=pronajem&estateType=byt&disposition=&ownership=&construction=&equipped=&balcony=&order=timeOrder_desc&boundary=%5B%5B%7B%22lat%22%3A50.171436864513%2C%22lng%22%3A14.506905276796942%7D%2C%7B%22lat%22%3A50.154133576294%2C%22lng%22%3A14.599004629591036%7D%2C%7B%22lat%22%3A50.14524430128%2C%22lng%22%3A14.58773054712799%7D%2C%7B%22lat%22%3A50.129307131988%2C%22lng%22%3A14.60087568578706%7D%2C%7B%22lat%22%3A50.122604734575%2C%22lng%22%3A14.659116306376973%7D%2C%7B%22lat%22%3A50.106512499343%2C%22lng%22%3A14.657434650206028%7D%2C%7B%22lat%22%3A50.090685542974%2C%22lng%22%3A14.705099547441932%7D%2C%7B%22lat%22%3A50.072175921973%2C%22lng%22%3A14.700004206235008%7D%2C%7B%22lat%22%3A50.056898491904%2C%22lng%22%3A14.640206899053055%7D%2C%7B%22lat%22%3A50.038528576841%2C%22lng%22%3A14.666852728301023%7D%2C%7B%22lat%22%3A50.030955909657%2C%22lng%22%3A14.656128752460972%7D%2C%7B%22lat%22%3A50.013435368522%2C%22lng%22%3A14.66854956530301%7D%2C%7B%22lat%22%3A49.99444182116%2C%22lng%22%3A14.640153080292066%7D%2C%7B%22lat%22%3A50.010839032542%2C%22lng%22%3A14.527474219359988%7D%2C%7B%22lat%22%3A49.970771602447%2C%22lng%22%3A14.46224174052395%7D%2C%7B%22lat%22%3A49.970669964027%2C%22lng%22%3A14.400648545303966%7D%2C%7B%22lat%22%3A49.941901176098%2C%22lng%22%3A14.395563234671044%7D%2C%7B%22lat%22%3A49.948384148423%2C%22lng%22%3A14.337635637038034%7D%2C%7B%22lat%22%3A49.958376114735%2C%22lng%22%3A14.324977842107955%7D%2C%7B%22lat%22%3A49.9676286223%2C%22lng%22%3A14.34491711110104%7D%2C%7B%22lat%22%3A49.971859099005%2C%22lng%22%3A14.326815050839059%7D%2C%7B%22lat%22%3A49.990608728081%2C%22lng%22%3A14.342731259186962%7D%2C%7B%22lat%22%3A50.002211140429%2C%22lng%22%3A14.29483886971002%7D%2C%7B%22lat%22%3A50.023596577558%2C%22lng%22%3A14.315872285282012%7D%2C%7B%22lat%22%3A50.058309376419%2C%22lng%22%3A14.248086830069042%7D%2C%7B%22lat%22%3A50.073179111%2C%22lng%22%3A14.290193274400963%7D%2C%7B%22lat%22%3A50.102973823639%2C%22lng%22%3A14.224439442359994%7D%2C%7B%22lat%22%3A50.130060800171%2C%22lng%22%3A14.302396419107936%7D%2C%7B%22lat%22%3A50.116019827009%2C%22lng%22%3A14.360785349547996%7D%2C%7B%22lat%22%3A50.148005694843%2C%22lng%22%3A14.365662825877052%7D%2C%7B%22lat%22%3A50.14142969454%2C%22lng%22%3A14.394903042943952%7D%2C%7B%22lat%22%3A50.171436864513%2C%22lng%22%3A14.506905276796942%7D%2C%7B%22lat%22%3A50.171436864513%2C%22lng%22%3A14.506905276796942%7D%5D%5D&hasDrawnBoundary=1&mapBounds=%5B%5B%7B%22lat%22%3A50.289447077141126%2C%22lng%22%3A14.68724263943227%7D%2C%7B%22lat%22%3A50.289447077141126%2C%22lng%22%3A14.087801111111958%7D%2C%7B%22lat%22%3A50.039169221047985%2C%22lng%22%3A14.087801111111958%7D%2C%7B%22lat%22%3A50.039169221047985%2C%22lng%22%3A14.68724263943227%7D%2C%7B%22lat%22%3A50.289447077141126%2C%22lng%22%3A14.68724263943227%7D%5D%5D¢er=%7B%22lat%22%3A50.16447196305031%2C%22lng%22%3A14.387521875272125%7D&zoom=11&locationInput=praha&limit=15');
await page.waitForSelector('#search-content button.btn-icon');
while (await page.$('#search-content button.btn-icon') !== null) {
const articlesForNow = (await page.$$('#search-content article')).length;
console.log(`Articles for now: ${articlesForNow}. Getting more...`);
await Promise.all([
page.evaluate(
() => { document.querySelector('#search-content button.btn-icon').click(); }
),
page.waitForFunction(
old => document.querySelectorAll('#search-content article').length > old,
{},
articlesForNow
),
]);
}
const articlesAll = (await page.$$('#search-content article')).length;
console.log(`All articles: ${articlesAll}.`);
fs.writeFileSync('full.html', await page.content());
fs.writeFileSync('articles.html', await page.evaluate(
() => document.querySelector('#search-content div.b-filter__inner').outerHTML
));
fs.writeFileSync('articles.txt', await page.evaluate(
() => [...document.querySelectorAll('#search-content article')]
.map(({ innerText }) => innerText)
.join(`\n${'-'.repeat(50)}\n`)
));
console.log('Saved.');
await browser.close();
} catch (err) {
console.error(err);
}
})();
Just one line:
const html = await page.content();
Details:
import puppeteer from 'puppeteer'
const test = async (url) => {
const browser = await puppeteer.launch({ headless: false })
const page = await browser.newPage()
await page.goto(url, { waitUntil: 'networkidle0' })
const html = await page.content()
console.log(html)
}
await test('https://stackoverflow.com/')