Hi I'm working on a script to learn node JS, and I'm stuck at this point :
I would like to read a text file with my email: password, and with that I would like to open as many tabs as many email:password I have and use each email: password to connect to the website
const puppeteer = require("puppeteer");
const lineReader = require("line-reader");
(async () => {
let data = [];
const promises = []
let int = 0;
lineReader.eachLine("c.txt", function(line) {
int++;
data = line.split(":")
console.log(data);
});
console.log(int);
for (let i = 0; i < 2; i++) {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
const navigationPromise = page.waitForNavigation();
await page.goto("https:www.site.com/en/launch/");
await page.setViewport({ width: 1920, height: 1080 });
await page.waitForSelector(
".d-sm-h > .bg-white > .right-nav > .member-nav-item > .join-log-in"
);
await page.click(
".d-sm-h > .bg-white > .right-nav > .member-nav-item > .join-log-in"
);
const emailInput = '[placeholder="Adresse e-mail"][autocomplete="email"]';
await page.waitForSelector(emailInput, { timeout: 0 });
await page.focus(emailInput);
await page.keyboard.type(data[0]);
const passwordInput =
'[placeholder="Mot de passe"][autocomplete="current-password"]';
await page.waitForSelector(passwordInput, { timeout: 0 });
await page.focus(passwordInput);
await page.keyboard.type(data[1]);
await page.click(
".site-unite-submit-button.loginSubmit.site-unite-component"
);
}
await Promise.all(promises)
})();
but what I have when I consol log my data :
[ 'email#gmail.com:tesssst' ]
[ 'email2#gmail.com:tesssst' ]
I would like to know and understand how can I use each email:password that i have on my txt file to open tabs and use my data to login to the website
Thank you
Related
I'm having trouble signing in, when I open this page
https://www.nike.com.br/chuteira-nike-phantom-gt-elite-3d-unissex-153-169-171-316414?gridPosition=A1
It goes to another page, and I can't fill out the form, can someone help me fill out the login form?
(async () => {
try{
console.log("Started!")
const browser = await puppeteer.launch({
executablePathh:'/usr/bin/chromium', headless:false,
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
const blockedResourceTypes = ["image", "bacon", "imageset", "font", "stylesheet", "texttrack", "csp_report"]
const blockedURLs = [
""
]
const allowedRequest = req => !blockedResourceTypes.includes(req.resourceType()) && !blockedURLs.includes(req.url())
page.on('request', (req) => {
if(allowedRequest(req)) {
req.continue();
}
else {
req.abort();
}
});
await page.goto('https://www.nike.com.br/chuteira-nike-phantom-gt-elite-3d-unissex-153-169-171-316414?gridPosition=A1', {
waitUntil: 'domcontentloaded', timeout:0});
await page.waitForXPath('//label[#for="tamanho__idM40F395"]', {
visible:true, timeout:0});
const tamanho = await page.$x('//label[#for="tamanho__idM40F395"]')
await tamanho[0].click('//label[#for="tamanho__idM40F395"]');
await page.waitForSelector('button#anchor-acessar-unite-oauth2') await page.click('button#anchor-acessar-unite-oauth2')
const iframe = wait.ForSelector('iframe#nike-unite-oauth2-iframe')
const frame = await iframe.contentFrame()
await frame.type('input[name="emailAddress"]', 'test#gmail.com')
await frame.type('input[name="password"]', 'pass')
await frame.click('input[value="ENTRAR"]')
They realized that if you run the script it will go to the login page and then I can't fill out the form
To login in a new page instead of an iframe, try to replace this:
await page.click('button#anchor-acessar-unite-oauth2')
const iframe = wait.ForSelector('iframe#nike-unite-oauth2-iframe')
const frame = await iframe.contentFrame()
await frame.type('input[name="emailAddress"]', 'test#gmail.com')
await frame.type('input[name="password"]', 'pass')
await frame.click('input[value="ENTRAR"]')
with this:
await Promise.all([
page.click('button#anchor-acessar-unite-oauth2'),
page.waitForNavigation(),
])
await page.waitForSelector('input[name="emailAddress"]')
await page.type('input[name="emailAddress"]', 'test#gmail.com')
await page.type('input[name="password"]', 'pass')
await page.click('input[value="ENTRAR"]')
I was trying to scrape a thumbnail image from youtube with its XPath but I am getting undefined for the src. I can't figure out what is causing this? I already tried using both the XPath and full XPath but that didn't help. Any help is appreciated. Thanks in advance.
const puppeteer = require('puppeteer');
async function scrapeChannel1(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url, {
timeout: 0
});
const [el2] = await page.$x('//*[#id="dismissible"]/ytd-thumbnail');
const src1 = await el2.getProperty('src');
const thumbnailURL1 = await src1.jsonValue();
browser.close();
console.log({
thumbnailURL1
})
return {
thumbnailURL1
}
}
scrapeChannel1('https://www.youtube.com/')
The <img> you are looking for is placed a bit deeper in the DOM at: '//*[#id="dismissible"]/ytd-thumbnail/a/yt-img-shadow/img' (so you should add: /a/yt-img-shadow/img at the end of your XPath expression).
Note, you have more powerful tools in puppeteer than .getProperty('src') to retrieve DOM element properties.
E.g. page.$eval:
const selector = 'ytd-thumbnail > a > yt-img-shadow > #img'
const imageSrc = await page.$eval(selector, el => el.src)
// returns: https://i.ytimg.com/vi/{youtube_id}/hqdefault.jpg...
Or if you want all images use page.$$eval:
const imageSrcs = await page.$$eval(selector, elems => elems.map(el => el.src))
If you want to get images src from YouTube, you need to scroll video thumbnails into view like in the code below (also check it on the online IDE):
const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
puppeteer.use(StealthPlugin());
const mainPageUrl = "https://www.youtube.com";
async function scrollPage(page, scrollElements) {
let currentElement = 0;
while (true) {
let elementsLength = await page.evaluate((scrollElements) => {
return document.querySelectorAll(scrollElements).length;
}, scrollElements);
for (; currentElement < elementsLength; currentElement++) {
await page.waitForTimeout(200);
await page.evaluate(
(currentElement, scrollElements) => {
document.querySelectorAll(scrollElements)[currentElement].scrollIntoView();
},
currentElement,
scrollElements
);
}
await page.waitForTimeout(5000);
let newElementsLength = await page.evaluate((scrollElements) => {
return document.querySelectorAll(scrollElements).length;
}, scrollElements);
if (newElementsLength === elementsLength || currentElement > 100) break; // if you want to get all elements (or some other number of elements) change number to 'Infinity' (or some other number)
}
}
async function getThumbnails() {
const browser = await puppeteer.launch({
headless: false,
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.setDefaultNavigationTimeout(60000);
await page.goto(mainPageUrl);
await page.waitForSelector("#contents");
const scrollElements = "a#thumbnail";
await scrollPage(page, scrollElements);
await page.waitForTimeout(10000);
const urls = await page.$$eval("a#thumbnail #img", (els) => els.map(el => el.getAttribute('src')).filter(el => el));
await browser.close();
return urls;
}
getThumbnails().then(console.log);
Output
[
"https://i.ytimg.com/vi/02oeySm1CJA/hq720.jpg?sqp=-oaymwEcCNAFEJQDSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLBmrYMHESpY_f1oTNx00iuR3tNeCQ",
"https://i.ytimg.com/vi/RMo2haIPYBM/hq720_live.jpg?sqp=CNifxJcG-oaymwEcCNAFEJQDSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLBw4ogzR0709SqbttRdEzfL-aTdgQ",
"https://i.ytimg.com/vi/qJFFp_ta1Zk/hqdefault.jpg?sqp=-oaymwEcCOADEI4CSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLBJ-44OFgBUuVUYWBVh3Yi3hQgwIg",
"https://i.ytimg.com/vi/OZoTjoN-Sn0/hqdefault.jpg?sqp=-oaymwEcCOADEI4CSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLCOeGTCnlT4U0wV1SNclkmFUEHLaA",
"https://i.ytimg.com/vi/L8cH2gI67uk/hqdefault.jpg?sqp=-oaymwEcCOADEI4CSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLAuvZ3khIjpvAVTGjmR9FDxQrPIgQ",
"https://i.ytimg.com/vi/6rUyVKyJnGY/hq720.jpg?sqp=-oaymwEcCNAFEJQDSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLCifsTG4MlA3mf8CcJDkfKdWaZkaA",
"https://i.ytimg.com/vi/xpaURivPZFk/hq720_2.jpg?sqp=-oaymwEdCJYDENAFSFXyq4qpAw8IARUAAIhCcAHAAQbQAQE=&rs=AOn4CLA5oFDDsVzbV3tUqyfogfuf3LPahQ",
"https://i.ytimg.com/vi/MsR76PyVdUs/hq720_2.jpg?sqp=-oaymwEdCJYDENAFSFXyq4qpAw8IARUAAIhCcAHAAQbQAQE=&rs=AOn4CLAEBYGNvif-7LWx2mqW4G9o-OUhEQ",
"https://i.ytimg.com/vi/liasQRRVt5w/hq720_2.jpg?sqp=-oaymwEdCJYDENAFSFXyq4qpAw8IARUAAIhCcAHAAQbQAQE=&rs=AOn4CLAUcMpyKY0GhmNAHHtP_cDkAp18DQ",
"https://i.ytimg.com/vi/Dr5IqlTLMDM/hq720_2.jpg?sqp=-oaymwEdCJYDENAFSFXyq4qpAw8IARUAAIhCcAHAAQbQAQE=&rs=AOn4CLBOSUi6mgjdD5a-Jx8Ns24SlexB1g",
"https://i.ytimg.com/vi/E8kit8xJKdI/hq720_2.jpg?sqp=-oaymwEdCJYDENAFSFXyq4qpAw8IARUAAIhCcAHAAQbQAQE=&rs=AOn4CLDDStn95G7ei5DTusGXE4RimzdLUw",
"https://i.ytimg.com/vi/SqEaahOmLHU/hq720_2.jpg?sqp=-oaymwEdCM0CENAFSFXyq4qpAw8IARUAAIhCcAHAAQbQAQE=&rs=AOn4CLBDcWLCklNxEAuT1ZvSTKrIplGOag",
...and other results
]
You can read more about scraping YouTube search from my blog post Web scraping YouTube search video results with Nodejs.
So, I am trying to scrape a couple of searchengines with a couple of search phrases with Playwright.
Running the script with one query is working.
Working:
const { chromium } = require('playwright');
(async () => {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
const keyWord = ('Arsenal');
await page.goto('https://duckduckgo.com/');
await page.fill('//input[#name="q"]',keyWord);
await page.keyboard.press('Enter');
const getOne = (' (//h2[#class="result__title"])[9] ');
await page.waitForSelector(getOne)
const pushOne = await page.$(getOne);
const One = await pushOne.evaluate(element => element.innerText);
console.log(One);
await page.goto('https://yandex.com/');
await page.fill('//input[#aria-label="Request"]', keyWord);
await page.keyboard.press('Enter');
const getTwo = (' //li[#data-first-snippet] //div[#class="organic__url-text"] ');
await page.waitForSelector(getTwo)
const pushTwo = await page.$(getTwo);
const Two = await pushTwo.evaluate(element => element.innerText);
console.log(Two);
await browser.close()
})()
But when I use an array with phrases (keyWordlist) I fail to get the script running.
Have searched around for using Array with 'For' and 'Foreach' loops, but haven't been able to fix it.
I want to run the different keywords through the different searchengines and list the results.
For 3 keywords in two searchengines that would get 6 results.
const { chromium } = require('playwright');
(async () => {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
let kewWordlist = ['Arsenal', 'Liverpool', 'Ajax']
for (var i=0; i<=kewWordlist.length; i++) {
// for (const i in kewWordlist){
async () => {
const keyWord = kewWordlist[i];
await page.goto('https://duckduckgo.com/');
await page.fill('//input[#name="q"]',keyWord);
// await page.fill('//input[#name="q"]',[i]);
// await page.fill('//input[#name="q"]',`${keyWord}`);
await page.keyboard.press('Enter');
const getOne = (' (//h2[#class="result__title"])[9] ');
await page.waitForSelector(getOne)
const pushOne = await page.$(getOne);
const One = await pushOne.evaluate(element => element.innerText);
console.log(One);
// await page.goto('https://yandex.com/');
// await page.fill('//input[#aria-label="Request"]', keyWord);
// await page.keyboard.press('Enter');
// const getTwo = (' //li[#data-first-snippet] //div[#class="organic__url-text"] ');
// await page.waitForSelector(getTwo)
// const pushTwo = await page.$(getTwo);
// const Two = await pushTwo.evaluate(element => element.innerText);
// console.log(Two);
}}
await browser.close()
})()
If anyone has some pointers on how to solve this, much obliged.
maybe the result selectors needs some tweaking but I think this is what you were looking for:
test.only('search search engines', async({page, context}) => {
const search = [
{
name: 'yandex',
url: 'https://yandex.com/',
elementFill: '//input[#aria-label="Request"]',
elementResult: '//li[#data-first-snippet] //div[#class="organic__url-text"]'
},
{
name: 'google',
url: 'https://www.google.nl',
elementFill: '//input[#name="q"]',
elementResult: '(//h2[#class="result__title"])[9]'
},
{
name: '',
url: 'https://duckduckgo.com/',
elementFill: '//input[#name="q"]',
elementResult: '(//h2[#class="result__title"])[9]'
}
]
const kewWordlist = ['Arsenal', 'Liverpool', 'Ajax']
for (let i = 0; i < search.length; i++) {
const searchName = search[i].name
const searchResult = search[i].elementResult
const searchFill = search[i].elementFill
const searchPage = await context.newPage()
await searchPage.waitForLoadState()
await searchPage.goto(`${search[i].url}`)
for (let i = 0; i < kewWordlist.length; i++) {
await searchPage.fill(searchFill,kewWordlist[i])
await searchPage.keyboard.press('Enter')
await searchPage.waitForSelector(searchResult)
const result = await page.$(searchResult)
console.log(`${searchName}: ${result} `)
}
}
})
The reason your loop isn't working is that you have an async function inside of it that you never call. There are a few ways you could go about this:
You could take your first version, have it accept a word to search, and run that over each element of the array:
const searchOneKeyword = async (keyWord) => {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
// rest of code
}
const kewWordList = ['Arsenal', 'Liverpool', 'Ajax']
keyWordList.forEach((k) => {
searchOneKeyword(k)
})
Or if you'd like to keep the same browser instance, you can do it in a loop in the function:
const search = async (words) => {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
for (const keyWord of words) {
await page.goto('https://duckduckgo.com/');
await page.fill('//input[#name="q"]',keyWord);
await page.keyboard.press('Enter');
const getOne = (' (//h2[#class="result__title"])[9] ');
await page.waitForSelector(getOne)
const pushOne = await page.$(getOne);
const One = await pushOne.evaluate(element => element.innerText);
console.log(One);
// etc.
}
await browser.close()
}
search(keyWordList)
In both of those cases, you're logging, but never returning anything, so if you need that data in another function afterwards, you'd have to change that. Example:
const search = async (words) => {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
const results = await Promise.all(words.map((keyWord) => {
await page.goto('https://duckduckgo.com/');
await page.fill('//input[#name="q"]',keyWord);
await page.keyboard.press('Enter');
// etc.
return [ One, Two ]
}))
await browser.close()
return results
}
search(keyWordList).then((results) => { console.log(results.flat()) })
I have spent a couple of hours trying to get the script working based on your suggestions. No result unfortunately. I get errors like 'await is only valid in async function' and 'Unreachable code detected'. Searched for other examples, for some inspiration, but none found. If you or someone else has a suggestion, please share! This is code I have now:
const { chromium } = require('playwright');
let keyWordList = ['Arsenal', 'Liverpool', 'Ajax']
const search = async function words() {
const browser = await chromium.launch({ headless: false, slowMo: 250 });
const context = await browser.newContext()
const page = await context.newPage();
}
const results = await Promise.all(words.map(keyWord))
//DUCKDUCKGO
await page.goto('https://duckduckgo.com/');
await page.fill('//input[#name="q"]',keyWord);
await page.keyboard.press('Enter');
const getOne = (' (//h2[#class="result__title"])[9] ');
await page.waitForSelector(getOne)
const pushOne = await page.$(getOne);
const One = await pushOne.evaluate(element => element.innerText);
//YANDEX
await page.goto('https://yandex.com/');
await page.fill('//input[#aria-label="Request"]', keyWord);
await page.keyboard.press('Enter');
const getTwo = (' //li[#data-first-snippet] //div[#class="organic__url-text"] ');
await page.waitForSelector(getTwo)
const pushTwo = await page.$(getTwo);
const Two = await pushTwo.evaluate(element => element.innerText);
console.log(Two);
return [ One , Two ]
return results
search(keyWordList).then((results) => { console.log(results.flat())
await browser.close();
})
I'm new to puppeteer so dont know much about it. This is my code so far and everything works.
But I want it to click the login button on the page after it has put the text in the fields, but I cannot figure out how to do it for the life of me. I've tried many different things and none work. Any help with this would be awesome.
just incase you need it
https://server.nitrado.net/usa/rent-gameserver
(async () => {
console.log('launch browser');
const browser = await pup.launch({headless: false});
console.log('new page');
const page = await browser.newPage();
console.log('goto');
await page.setViewport({ width: 1920, height: 1080 });
await page.goto('https://server.nitrado.net/usa/rent-gameserver', { waitUntil: "networkidle2", timeout: 60000 });
await page.waitFor(5000);
console.log('extract login iframe');
var iframes = await page.frames();
var loginFrame = iframes.find(f => f.url().indexOf("oauth.nitrado.net") > -1);
await page.waitFor(5000);
console.log('evaluate iframe');
await loginFrame.evaluate(() => {
document.getElementById('username').value = 'test';
document.getElementById('password').value = '12345';
});
await page.waitFor(300000);
console.log('done');
await browser.close();
})()```
I tried a workaround entering the frame url, I'm not sure if this will help, but here goes the code (main.js):
const pup = require('puppeteer');
mainFunc = async function () {
return new Promise(async (resolve, reject) => { //Wrap de promise
var browser;
try {
//Wrap de tratamento de erros
const browser = await pup.launch({ headless: false });
const page = await browser.newPage();
//SELECTORS:
var userInputSel = '#username';
var passInputSel = '#password';
var loginBtnSel = '#auth_login_ws_header > form > button';
var myUser = "myusername"; //PUT YOUR USERNAME HERE!!!
var myPass = "MyPaSsWoRd123"; //PUT YOUR PASSWORD HERE!!!
await page.goto('https://server.nitrado.net/usa/rent-gameserver',
{ waitUntil: "networkidle2", timeout: 60000 });
await page.waitFor(5000);
console.log('extract login iframe');
var iframes = await page.frames();
var loginFrame = iframes.find(f => f.url().indexOf("oauth.nitrado.net") > -1);
console.log(loginFrame.url())
await page.goto(loginFrame.url(),
{ waitUntil: "networkidle2", timeout: 60000 });
await page.waitFor(5000);
console.log('evaluate iframe');
/*await loginFrame.evaluate(() => {
document.getElementById('username').value = 'test';
document.getElementById('password').value = '12345';
});*/
await page.waitForSelector(userInputSel);
await page.type(userInputSel, myUser);
await page.waitForSelector(passInputSel);
await page.type(passInputSel, myPass);
await page.waitForSelector(loginBtnSel);
await page.click(loginBtnSel);
await page.waitFor(300000);
console.log('done');
await browser.close();
} catch (e) {
if(browser!=undefined){
browser.close();//Close browser if error
}
return reject(e);
}
});//Wrap de promise
}
mainFunc();
This is a running version for you to test. Just type "node main". (of course you need puppeteer installed (npm i puppeteer))...
Every time I run this script is times-out.
Does setDefaultNavigationTimeout actually prevent time outs?
There is about 26 URLs I'm going through, and each page has a large amount of images. Can't imagine Puppeteer can't handle these pages just because of heavy images?
const url = 'test.com';
const jsonReturn = [];
async function runScraper() {
const browser = await puppeteer.launch(prodConfig);
const page = await browser.newPage({
timeout: 0
});
page.setDefaultNavigationTimeout(0);
await page.goto(url, { waitUntil: 'domcontentloaded' });
await page.waitForSelector('.featured-shows-featured-show');
let featuredShowsURLs = await page.$$eval('.featured-shows-featured-show > a', (links) => {
return links.map(link => {
return link.href;
});
});
featuredShowsURLs = _.uniq(featuredShowsURLs)
for (const featuredShowsURL of featuredShowsURLs) {
const page = await browser.newPage({
timeout: 0
});
try {
await page.goto(featuredShowsURL);
await page.waitForSelector('.show-title');
} catch (e) {
featuredShowsURL;
debugger;
}
const showTitle = await findAndReturnSelectorText('.show-title', page);
const showDates = await findAndReturnSelectorText('.show-dates', page);
const showLocation = await findAndReturnSelectorText('.show-location', page);
const showGallery = await findAndReturnSelectorText('.entity-link', page);
const showDetail = await findAndReturnSelectorText('.show-press-release', page);
const newItem = {
showTitle,
showDates,
showLocation,
showGallery,
showDetail,
};
const id = hash(newItem);
jsonReturn.push({
...newItem,
id
});
}
await browser.close();
}
runScraper();