how to get dynamic id by using selenium and JavaScript - javascript

I want to take all id from this page by using JavaScript.
const webdriver = require('selenium-webdriver');
const {Build, By, Key, until} = webdriver;
(async function getDiv () {
let driver = new webdriver.Builder().forBrowser('chrome').build();
try {
await driver.get('https://www.scoresandodds.com/nba');
await driver.sleep(1000);
var ids = driver.getElementsByName('id')
console.log("----");
//for (let elm of (await div)) {
// console.log(`${elm}`); }
console.log(`${ids}`);
}
catch(error) {
console.error(error);
}
finally {
if (driver) {
await driver.quit();
}
}
})();
its error, could you teach me why I canot get them?
Thank you.

I got this.
let webdriver = require('selenium-webdriver');
let chrome = require('selenium-webdriver/chrome');
let By = webdriver.By;
let map = webdriver.promise.map;
(async () => {
var driver = await new webdriver.Builder()
.withCapabilities(webdriver.Capabilities.chrome())
.setChromeService(new chrome.ServiceBuilder(
"C:chromedriver.exe")
)
.build();
await driver.get("https://www.scoresandodds.com/nba");
var elements = await driver.findElements(By.xpath("//*[#class='event-card']"));
var vals = await map(elements, element => element.getAttribute('id')).then();
vals.forEach(val => console.log(val));
await driver.quit();
})();

Related

I can't get puppeteer-core to use the search parameters in the url to get a full list of items from a website by changing the "pageSize" parameter

I am trying to get a list of items from a website with puppeteer-core.
Here is the code that should print 774 in the console but only returns 24.
const puppeteer = require('puppeteer-core');
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
async function test() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setJavaScriptEnabled(true)
await page.setDefaultNavigationTimeout(0);
await page.goto("https://www.liveauctioneers.com/catalog/256884_8-27-22-cameo-cut-glass-art-glass-and-more/?page=1&pageSize=1000", {waitUntil: "networkidle2"});
let pageContent = await page.content()
let dom = new JSDOM(pageContent)
let div = dom.window.document.querySelectorAll("div")
await div.forEach(element => {
if (element.id == "content") {
dom = new JSDOM(element.innerHTML)
div = dom.window.document.querySelectorAll("div")
div.forEach(element => {
if (element.className == "App__StyledApp-sc-eiwfgw-0 cHSpyq") {
dom = new JSDOM(element.innerHTML)
div = dom.window.document.querySelectorAll("div")
div.forEach(element => {
if (element.id == "main") {
dom = new JSDOM(element.innerHTML)
div = dom.window.document.querySelectorAll("div")
dom = new JSDOM(div[0].innerHTML)
div = dom.window.document.querySelectorAll("div")
div.forEach(element => {
if (element.className == "CatalogPageItems__StyledContainer-sc-y0p083-0 bLuQEb") {
dom = new JSDOM(element.innerHTML)
div = dom.window.document.querySelectorAll("div")
dom = new JSDOM(div[1].innerHTML)
div = dom.window.document.querySelectorAll("div")
dom = new JSDOM(div[0].innerHTML)
div = dom.window.document.querySelectorAll("div")
console.log(div.length)
}
});
}
});
}
});
}
})
await browser.close();
}
test()
For me this code returns 24 instead of 774. If I load the url "https://www.liveauctioneers.com/catalog/256884_8-27-22-cameo-cut-glass-art-glass-and-more/" into my browser the pageSize defaults to 24.
I tried to simplify the code to find the number of items. I looked only at how many children the grid has and ended up with this one simple line that gives me the correct result:
...
let pageContent = await page.content();
let dom = new JSDOM(pageContent);
let count = dom.window.document.querySelector(".Grid__ItemGridContainer-sc-1fy9g29-0").childElementCount;
console.log(count);
await browser.close();
...

How do I remove a specific custom conditional format from all ranges that use it? I am using the delete() method, but that doesn't work

When I apply the conditional formats, I store it in the conditionalFormatCols and conditionalFormatRows variables. I then use the delete() method on these variables, but this doesn't delete the conditional formatting on the cells. How do I delete conditional formatting so it no longer shows up in the workbook?
let eventResult;
let navAidIsOn = false;
let conditionalFormatCols;
let conditionalFormatRows;
async function navAid() {
await Excel.run(async (context) => {
if (!navAidIsOn) {
applyConditionalFormatting();
const sheet = context.workbook.worksheets.getActiveWorksheet();
eventResult = sheet.onSelectionChanged.add(navAidListener);
navAidIsOn = !navAidIsOn;
} else {
turnNavAidOff();
}
});
}
async function turnNavAidOff(){
await Excel.run(eventResult.context ,async (context) => {
eventResult.remove()
Here I try to use the delete() method
conditionalFormatCols.delete();
conditionalFormatRows.delete();
await context.sync();
navAidIsOn = !navAidIsOn;
});
}
async function navAidListener() {
applyConditionalFormatting();
}
async function applyConditionalFormatting() {
await Excel.run(async (context) => {
const sheet = context.workbook.worksheets.getActiveWorksheet();
let ranges;
let rangeArray = [];
await context.sync();
let columns;
let rows;
ranges = context.workbook.getSelectedRanges();
await context.sync();
ranges.load("address");
await context.sync();
let rangeStrs = ranges.address.split(",");
rangeStrs.forEach((rangeStr) => {
rangeArray.push(sheet.getRange(rangeStr).load("columnIndex, rowIndex, rowCount, columnCount"));
});
await context.sync();
rangeArray.forEach(async (range) => {
columns = sheet.getRangeByIndexes(0, range.columnIndex, 1048576, range.columnCount);
rows = sheet.getRangeByIndexes(range.rowIndex, 0, range.rowCount, 16384);
await context.sync();
conditionalFormatCols = columns.conditionalFormats.add(Excel.ConditionalFormatType.custom);
conditionalFormatCols.custom.rule.formula = "TRUE";
conditionalFormatCols.custom.format.fill.color = "#D2F0E0";
conditionalFormatRows = rows.conditionalFormats.add(Excel.ConditionalFormatType.custom);
conditionalFormatRows.custom.rule.formula = "TRUE";
conditionalFormatRows.custom.format.fill.color = "#D2F0E0";
await context.sync();
});
});
}

Refactoring JavaScript, shortening variable declaration

I have the following code that works as expected:
(async () => {
const first = document.getElementById("first"), second = document.getElementById("second");
try {
first.innerHTML = await (await fetch('https://myAPI.com/1')).text();
second.innerHTML = await (await fetch('https://myAPI.com/2')).text();
} catch {
first.innerHTML = 'Error';
second.innerHTML = 'Error';
}
})();
But I am curious whether there is a way to shorten the declaration of my variables, something like this (which doesn't work):
(async () => {
const [first, second] = document.getElementById("[first, second]");
try {
first.innerHTML = await (await fetch('https://myAPI.com/1')).text();
second.innerHTML = await (await fetch('https://myAPI.com/2')).text();
} catch {
first.innerHTML = 'Error';
second.innerHTML = 'Error';
}
})();
Just curious as to whether I can get rid of that second appearance of 'document.getElementById' to make the line more compact.
The most concise way to do it is by using the following:
const [first, second] = document.querySelectorAll("#first, #second");

Nodejs/Puppeteer How to iteratively scrape website without restarting another puppeteer instance?

I am trying to web scrape with puppetter, currently the following code works but I think there can be optimizations made one of which I think is to only use one puppetter instance. But I don't really know how to do that. Can anyone help me?
This is the working but slow original:
const puppeteer = require('puppeteer');
async function scrapeProduct(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const [el] = await page.$x('xpath of element');
const txt = await el.getProperty('textContent');
const rawTxt = await txt.jsonValue();
browser.close();
return rawTxt;
}
async function something() {
var some_varible = the length of some array;
process.setMaxListeners(some_varible);
for (var i = 0; i < some_varible; i++) {
var target = some_array[i].Name
var url = 'somewebsite' + target;
console.log(target + ": " + await scrapeProduct(url));
}
}
something();
This is my pathetic attempt at not using multiple instances of puppeteer: (Does not work)
const puppeteer = require('puppeteer');
async function scrapeProduct(url, page) {
await page.goto(url);
const [el] = await page.$x('xpath of element');
const txt = await el.getProperty('textContent');
const rawTxt = await txt.jsonValue();
return rawTxt;
}
async function something() {
var some_varible = the length of some array;
process.setMaxListeners(some_varible);
const browser = await puppeteer.launch();
const page = await browser.newPage();
for (var i = 0; i < some_varible; i++) {
var target = some_array[i].Name
var url = 'somewebsite' + target;
console.log(target + ": " + await scrapeProduct(url, page));
}
browser.close();
}
something();

Looping through a set of urls in Puppeteer

How would I scrape content from multiple urls using Puppeteer?
I've created a loop, but I'm only seeing the results for the first url.
I suspect it's something to do with where I declare the results variable, but I've had no luck trying, does anyone know how to do this?
const puppeteer = require('puppeteer');
function run() {
return new Promise(async (resolve, reject) => {
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const urls = ["https://www.marksandspencer.com/high-neck-long-sleeve-blouse/p/p60260040?image=SD_01_T43_5168_HD_X_EC_90&color=LIGHTDENIM&prevPage=plp", "https://www.marksandspencer.com/pure-cotton-printed-short-sleeve-t-shirt/p/p60263529?image=SD_01_T41_8030Z_Z4_X_EC_90&color=WHITEMIX&prevPage=plp"];
for (let i = 0; i < urls.length; i++) {
const url = urls[i];
await page.goto(url);
let products = await page.evaluate(() => {
let product = document.querySelector('h1[itemprop=name]').innerText;
let results = [];
let items = document.querySelectorAll('[data-ttip-id=sizeGridTooltip] tbody tr td label');
items.forEach((element) => {
let size = element.getAttribute('for');
let stockLevel = "";
let nearest_td = element.closest('td');
if (nearest_td.classList.contains('low-stock')) {
stockLevel = "Low stock"
} else if (nearest_td.classList.contains('out-of-stock')) {
stockLevel = "Out of stock"
} else {
stockLevel = "In stock"
}
results.push({
product: product,
size: size,
stock: stockLevel
})
});
return results
})
browser.close();
return resolve(products);
}
} catch (e) {
return reject(e);
}
})
}
run().then(console.log).catch(console.error);
These lines are inside your for loop:
browser.close();
return resolve(products);
So as part of the first iteration, you close the browser and return the function. You should move this out of your for loop and store products inside an array like this:
const urls = /* ... */;
const productsList = [];
for (let i = 0; i < urls.length; i++) {
const url = urls[i];
await page.goto(url);
let products = await page.evaluate(/* ... */);
productsList.push(products);
}
browser.close();
return resolve(productsList); // resolve with an array containing the aggregated products
In case you are looking for a more elegant solution (for crawling the pages in parallel), you might want to have a look at the library puppeteer-cluster (disclaimer: I'm the author).

Categories

Resources