Puppeteer - Accessing an IFrame within an IFrame - javascript

I am having an issue retrieving an IFrame within an IFrame I am not exactly sure how to get the result that I am looking for, I am able to retrieve the Parent IFrame just fine, however when trying to choose the child it does not find the selector of the click method. How would I go about solving this? I am quite new to Puppeteer
// Get Email
await click(contentBuilderFrame, getEmail)
await page.waitFor(200);
await click(contentBuilderFrame, '#index-entry > div > div > div.active > div > div > div.contentproperties-header > div.contentproperties-buttons > div > div > div > a');
await page.waitFor(200);
const container = await findFrame(page, "EmailAppContainer");
await page.waitFor(200);
// Will not find it
const child = await findFrame(container, "cloud/tools/SSO.aspx?legacy=1&env=default&appId=6DF293A1-4FDC-41CD-AA08-89EFC793C33C&deepLink=%2f%23%2femails");
// Timeout because cant find selector due to not finding frame
await click(child, '#sendflow-step-content')
findFrame Method:
async function findFrame(page, urlPart) {
const mainFrame = page.mainFrame();
const contentFrames = await getContentFrames(mainFrame);
let frame = null;
for (const contentFrame of contentFrames) {
if (contentFrame.url().includes(urlPart)) {
frame = contentFrame;
}
}
return frame;
}
getContentFrames:
async function getContentFrames(frame) {
const contentFrames = [];
const iframes = await frame.$$("iframe");
for (const iframe of iframes) {
const contentFrame = await iframe.contentFrame();
if (contentFrame != null) {
contentFrames.push(contentFrame);
let subContentFrames = await getContentFrames(contentFrame);
if (subContentFrames.length > 0) {
contentFrames.push(...subContentFrames);
}
}
}
//console.log(contentFrames);
return contentFrames;
}
My understanding here is that I get all the frames with the getContentFrames method - I made sure that is the case by logging the output for these, but unsure how to proceed from there on out?

Related

Looping through multiple links properly

I am very new to puppeteer. I started yesterday and I'm trying to make a program that flips through a url that incrementally stores player id's one after the other and saves the player stats using neDB. There are thousands of links to flip through and I have found that if i use a for loop my computer basically crashes because 1,000 Chromiums try to open all at the same time. Is there a better way, or proper way to do this? Any advice would be appreciated.
const puppeteer = require('puppeteer');
const Datastore = require('nedb');
const database = new Datastore('database.db');
database.loadDatabase();
async function scrapeProduct(url){
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
let attributes = [];
//Getting player's name
const [name] = await page.$x('//*[#id="ctl00_ctl00_ctl00_Main_Main_name"]');
const txt = await name.getProperty('innerText');
const playerName = await txt.jsonValue();
attributes.push(playerName);
//Getting all 12 individual stats of the player
for(let i = 1; i < 13; i++){
let vLink = '//*[#id="ctl00_ctl00_ctl00_Main_Main_SectionTabBox"]/div/div/div/div[1]/table/tbody/tr['+i+']/td[2]';
const [e1] = await page.$x(vLink);
const val = await e1.getProperty('innerText');
const skillVal = await val.jsonValue();
attributes.push(skillVal);
}
//creating a player object to store the data how i want (i know this is probably ugly code and could be done in a much better way)
let player = {
Name: attributes[0],
Athleticism: attributes[1],
Speed: attributes[2],
Durability: attributes[3],
Work_Ethic: attributes[4],
Stamina: attributes[5],
Strength: attributes[6],
Blocking: attributes[7],
Tackling: attributes[8],
Hands: attributes[9],
Game_Instinct: attributes[10],
Elusiveness: attributes[11],
Technique: attributes[12],
};
database.insert(player);
await browser.close();
}
//For loop to loop through 1000 player links... Url.com is swapped in here because the actual url is ridiculously long and not important.
for(let i = 0; i <= 1000; i++){
let link = 'https://url.com/?id='+i+'&section=Ratings';
scrapeProduct(link);
console.log("Player #" + i + " scrapped");
}
The easiest tweak would be to wait for each link to finish before starting the next:
(async () => {
for(let i = 0; i <= 1000; i++){
let link = 'https://url.com/?id='+i+'&section=Ratings';
await scrapeProduct(link);
console.log("Player #" + i + " scrapped");
}
})();
You could also allow only enough open as your computer can handle. This will require more resources, but will allow the process to finish faster. Figure out the limit you want, then do something like:
let i = 0;
const getNextLink = () => {
if (i > 1000) return;
let link = 'https://url.com/?id='+i+'&section=Ratings';
i++;
return scrapeProduct(link)
.then(getNextLink)
.catch(handleErrors);
};
Promise.all(Array.from(
{ length: 4 }, // allow 4 to run concurrently
getNextLink
))
.then(() => {
// all done
});
The above allows for 4 calls of scrapeProduct to be active at any one time - change the number as needed.
If you think that the issue with speed is reopening/closing the browser with each run, move browser to the global scope and initialize it to null. Then create a init function with something like:
async function init(){
if(!browser)
browser = await puppeteer.launch()
}
Allow pages to be passed to your scrapeProduct function. async function scrapeProduct(url) becomes async function scrapeProduct(url,page). Replace await browser.close() with await page.close(). Now your loop will look like this:
//For loop to loop through 1000 player links... Url.com is swapped in here because the actual url is ridiculously long and not important.
await init();
for(let i = 0; i <= 1000; i++){
let link = 'https://url.com/?id='+i+'&section=Ratings';
let page = await browser.newPage()
scrapeProduct(link,page);
console.log("Player #" + i + " scrapped");
}
await browser.close()
If you wanted to limit number of pages the browser will concurrently run you could create a function to do that:
async function getTotalPages(){
const allPages = await browser.pages()
return allPages.length
}
async function newPage(){
const MAX_PAGES = 5
await new Promise(resolve=>{
// check once a second to check on pages open
const interval = setInterval(async ()=>{
let totalPages = await getTotalPages()
if(totalPages< MAX_PAGES){
clearInterval(interval)
resolve()
}
},1000)
})
return await browser.newPage()
}
If you did this, in your loop you'd replace let page = await browser.newPage with let page = await newPage()

How to check if element exists inside async / await with Puppeteer

I am having trouble recording an element on a page after checking if it exists. The block of code I'm referring to is under the "// phone" comment.
This code loops through each section (section of sections) on the page and records "company" and "phone." "Phone" may not be present in some sections so I figured I'd pass it through an if statement to check if it exists. This creates an error = "Error: failed to find element matching selector ".mn-contact-phone"" How do I solve this?
(async () => {
try {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
// loop through pages
for (let pg = 1; pg < 5; pg++) {
await page.goto("webpage");
// record number of sections
const sections = await page.$$("#mn-members-listings > div");
// loop through each section
for (const section of sections) {
// company
let company = await section.$eval(
"div.mn-searchlisting-title",
comp => comp.innerText
);
// phone --> THIS IF/ELSE THROWS AN ERROR
if (section.$(".mn-contact-phone").length > 0) {
let phone = await section.$eval(".mn-contact-phone", phn => phn.innerText);
} else {
let phone = "";
}
console.log(`Company = ${company} | Phone = ${phone}`);
}
}
await browser.close();
} catch (error) {
console.log(`Our error is = ${error}`);
}
})();
From puppeteer docs:
The method runs document.querySelector within the page. If no element
matches the selector, the return value resolves to null.
1) null doesn't have length.
2) ElementHandle.$ returns a promise.
change the condition to:
if (await section.$(".mn-contact-phone"))
or if there are multiple elements:
if (await section.$$(".mn-contact-phone").length > 0)

Looping inside a page.evaluate in Puppeteer

I have a loop inside a page.evaluate method. The loop iterates a query selector which catches an innerText from multiple instances of a text element in a page.
I am receiving an error Evaluation Failed: Cannot read property of 'innerText'
I tried to loop outside of page.evaluate, but my iteration variable is not accessible from within the page.evaluate function.
// Here's a rough draft of what i'm trying to achieve:
const scrapeData = [];
const data = await page.evaluate(() => {
// Iteration to capture each target text in the page
for (var i = 1; i < 9; i++) {
// Select target text
const serpDesc = document
.querySelector(
`#rso > div:nth-child(4) > div > div:nth-child(${i}) > div > div > div.s > div > span`
)
.innerText.trim();
// Build an array for the captured text
scrapeData[i] = serpDesc
return {
serpDesc
};
};
});
My goal is to scrape some link descriptions(plain text) from a page into an array. Without the iteration code, everything works fine.
Try:
const serpDesc = await page.evaluate(
() => [...document.querySelectorAll(`#rso > div:nth-child(4) > div > div:nth-child(${i}) > div > div > div.s > div > span`)].map(elem => elem.innerText)
);
You will probably need to reconstruct your selector a bit, or maybe wrap serpDesc function in a for of or forEach loop.
You could also try something like
async function elSelector(i) {
//Where i is the incremented value you pass
await page.evaluate((i) => {
let eval = $('yourSelector').toArray();
$(eval[i]).innerText
}, i)
}
for (i=0; i<9; i++) {
elSelector(i);
}

Waiting for an iframe to be opened and scraped is too slow to scrape js

I'm trying to scrape an old website built with tr, br and iframe. Everything was going good so far before I started to want to extract data from an iframe, see iFrameScraping setTimeout, but the clicking is too fast for me to be able to get the datas. Would anyone have an idea of how to click, wait for the content to show and be scraped, then continue?
const newResult = await page.evaluate(async(resultLength) => {
const elements = document.getElementsByClassName('class');
for(i = 0; i < resultLength; i++) {
const companyArray = elements[i].innerHTML.split('<br>');
let companyStreet,
companyPostalCode;
// Get company name
const memberNumber = elements[i].getElementsByTagName('a')[0].getAttribute('href').match(/[0-9]{1,5}/)[0];
const companyName = await companyArray[0].replace(/<a[^>]*><span[^>]*><\/span>/, '').replace(/<\/a>/, '');
const companyNumber = await companyArray[0].match(/[0-9]{6,8}/) ? companyArray[0].match(/[0-9]{6,8}/)[0] : '';
// Get town name
const companyTown = await companyArray[1].replace('"', '');
// Get region name
const companyRegion = await companyArray[2].replace(/<span[^>]*>Some text:<\/span>/, '');
// Get phone number
const telNumber = await elements[i].innerHTML.substring(elements[i].innerHTML.lastIndexOf('</span>')).replace('</span>', '').replace('<br>', '');
const iFrameScraping = await setTimeout(async({elements, i}) => {
elements[i].getElementsByTagName('a')[0].click();
const iFrameContent = await document.getElementById('some-id').contentWindow.document.getElementById('lblAdresse').innerHTML.split('<br>');
companyStreet = iFrameContent[0].replace('"', '');
companyPostalCode = iFrameContent[2].replace('"', '');
}, 2000, {elements, i});
console.log(companyStreet, companyPostalCode)
};
}, pageSearchResults.length);
I fixed my issues after a while, so I'll share my solution.
I add to stop getting all the data with a loop from the evaluate because it's going to fast and creating a race condition. Instead I used a combination of page.$$ coupled with a for…of loop. Note that the forEach from es6 are causing race condition as well, since puppeteer does not wait for them to end to continue its execution.
Here is the example from my updated code:
const companies = await page.$$('.repmbr_result_item');
const companiesLinks = await page.$$('.repmbr_result_item a');
for(company of companies) {
const companyEl = await page.evaluate(el => el.innerHTML, company)
const companyElArray = companyEl.split('<br>');

WebDriver in javascript: how to check if an element exists?

How can I check if an element exists in selenium?
I have tried:
browser.driver.findElements(by.id('my-id'))
but it does not seem to work.
Use isElementPresent
browser.driver.isElementPresent(by.id('my-id'))
or isPresent
element(by.id('my-id')).isPresent()
The problem is looking for an element that does not exist throws an exception. You ar eon the right track with using findElements as this will not throw an error if it cannot find the element, the problem you have is being left with a list that contains the elements found but not comparing to make sure that there is at least 1 element in the list (e.g. it found one element)
public boolean exists(By by){
return !driver.findElements(by).isEmpty();
}
is a function that will return true if it exists, false otherwise. Its clear from this method how you could modify it to suit your need.
webdriver.js example
In this example I am waiting for a modal to exist because its a modal that fades in
See the line that reads
const doesModalFadeInExist = await this.driver.executeScript('return document.getElementsByClassName("modal fade in")');
You can put this line of code in a wait that waits for the element array to be >=1
This way you know it is there after checking inside the wait.
We can also check opacity at the same time.
Once the element array >=1 and opacity = '1' . then we exit the wait and continue the test
async waitForModalFadeIn() {
try {
let isVisible;
let opacityReturned;
let isModalFadeInFound;
const dialog = await this.driver.wait(until.elementLocated(this.baseselector.MODAL_FADE_IN));
await this.driver.wait(async () => {
const doesModalFadeInExist = await this.driver.executeScript('return document.getElementsByClassName("modal fade in")');
isModalFadeInFound = doesModalFadeInExist;
const bool = await this.isDisplayed(this.baseselector.MODAL_FADE_IN);
isVisible = bool;
const opacity = await dialog.getCssValue('opacity');
opacityReturned = opacity;
return isVisible === true && isModalFadeInFound.length > 0 && opacity === '1';
}, 4000);
return opacityReturned === '1' && isVisible === true && isModalFadeInFound.length > 0;
} catch (err) {
console.log(`Function waitForModalFadeIn: Failed to open modal${err}`);
return null;
}
}
Example Test
it('Trigger "unmetered" modals in the technical spec section', async () => {
await wHost.visit(page = '/web-hosting');
const table = await driver.wait(until.elementLocated(wHost.selector.COMPETITOR_TABLE), this.pageLoadTimeOut);
await driver.executeScript('arguments[0].scrollIntoView()', table);
const modals = await table.findElements(wHost.selector.UNLIMITED_MODAL);
for (let i = 0; i < modals.length; i++) {
await modals[i].click();
assert.isTrue(await common.waitForModalFadeIn());
assert.isTrue(await common.closeModal());
}
});
This is what you are looking for, This function takes id as parameter,
If the element is found it returns json object with status=true and element you were looking for.
async function ElementExistsID(element){
return new Promise(async (resolve)=>{
try{
let ElementResult=await driver.findElement(webdriver.By.id(element));
resolve({"element":ElementResult,"status":true});
}
catch(error){
log.warn(error);
resolve({"status":false});
}
})
}
You can use it like this
let ElementIamLookingFor=await ElementExistsID(IDOfTheElement);
if(ElementIamLookingFor.status){
console.log("Element exists");
}
else{
throw new Error("Element not found");
}
After many attempts, the following worked for me:
function doIfPresent(elementId, doIfPresent, next) {
var elem = by.id(elementId);
browser.driver.isElementPresent(elem).then(function (present){
if (present) {
doIfPresent(element(elem));
}
next();
});
}
eg.
doIfPresent('my-button', function(element){ element.click();}, function(){
// your code continues here
});
I don't get it though why we'd need to use futures here.

Categories

Resources