Cheerio selector returns blank array - javascript

Try to get names of mathes in "Starting soon" section of this web-site: https://favbet.com/en/bets/
This is my code:
var request = require('request'),
cheerio = require('cheerio');
matches = [];
request('https://favbet.com/en/bets/', function (err, resp, body) {
if (!err && resp.statusCode == 200) {
var $ = cheerio.load(body);
var content = $('#startsoon').find('li.col20').each(function() {
var match = this.text();
matches.push(match);
});
console.log(matches);
}
});
Console.log returns blank array. I'm not quite understand how the deep nesting works in this case.

Related

web scraping question: trying to store a specific value of my scrape into a var

const cheerio = require("cheerio");
const request = require("request");
request("https://champion.gg/champion/Nautilus/Support?league=", (error, response, html) => {
if (!error && response.statusCode == 200)
{
var $ = cheerio.load(html);
var row = $('.footer-attr');
var divvoorscript = row.prev().prev().html();
//var physicaldmg = divvoorscript.find('physicalDmg');
console.log(divvoorscript);
}
});
This is what I'm currently using and it returns all this
https://i.imgur.com/l1pOQ6V.png
I'd like to save the value of "physicalDmg" "magicDmg" and "trueDmg" into 3 variables but have no idea how to go about this.
If someone could help me with this that'd be great!

Javascript: Parsing HTML table with Cheerio

I am trying to develop a web scraper using Cheerio to parse an HTML table and output the results in a CSV file, unfortunately the code won't return anything. My code is the following:
var request = require("request");
var cheerio = require("cheerio");
var fs = require("fs");
var url = "https://stat.epa.gov.tw/";
request(url, function(err, response, html){
if(!err) {
var $ = cheerio.load(html);
var allitems = $('table.tableAQI').children().children();
var items = []
allitems.each(function(index){
var result = $('table.tableAQI').children().children().eq(index).find("td").text();
if(result !="") {
items.push(result);
}
});
fs.writeFile("output2.csv", JSON.stringify(items, null, 4), function(err){
if(err) {
console.log(err);
} else {
console.log("Data has been written");
}
});
console.log(items);
}
});
The website provides 2 tables I tried the same code to parse the other table ('table.aqicolor') and it worked just fine so I am not really sure what is wrong.
I appreciate your support.

How to add into array the url that contains specific String?

I am trying to get all values of element
var request = require('request');
var cheerio = require('cheerio');
var url = "https://www.mismarcadores.com";
request(url, function(err, resp, body) {
if (err) throw err;
var $ = cheerio.load(body);
var addUrl= [];
$('a').each(function (i, element) {
var a = $(this);
var href = a.attr('href');
addUrl.push(href);
})
console.log(array[0]);
})
I have this code that add the links to array called addUrl , this works perfect but now I am looking how to add to this array if the url contains the word 'baloncesto' in href.
Good example : https://www.mismarcadores.com/baloncesto/alemania/
This URL , is good but
Bad example : https://www.mismarcadores.com/golf/
This is wrong.
I am developing this using NodeJS but this is only a simply javascript that now I don't know how to made this.
Could anyone help to me?
Please try like this:
var request = require('request');
var cheerio = require('cheerio');
var url = "https://www.mismarcadores.com";
var filterStr = 'baloncesto';
request(url, function(err, resp, body) {
if (err) throw err;
var $ = cheerio.load(body);
var addUrl= [];
$('a').each(function (i, element) {
var href = $(this).attr('href');
if (href.includes(filterStr)) addUrl.push(href);
})
console.log(addUrl);
})
Okay, so what you're looking for is a pattern match - if href contains the string baloncesto. I suspect that this SO thread will be helpful to you - you're basically looking to nest this line -
addUrl.push(href);
- in an if statement, like
if(href.includes('baloncesto')) {
addUrl.push(href);
}
...but definitely look at that other answer for reference, in case you're using a version of JS that's older and not compatible with ES6.
you can try this
if(href.contains('baloncesto')){
addUrl.push(href);
}
Try this
var request = require('request');
var cheerio = require('cheerio');
var url = "https://www.mismarcadores.com";
request(url, function(err, resp, body) {
if (err) throw err;
var $ = cheerio.load(body);
var addUrl= [];
$('a').each(function (i, element) {
var a = $(this);
var href = a.attr('href');
if (decodeURIComponent(href).contains('baloncesto')){
addUrl.push(href);
} else {
//do something else
}
})
console.log(array[0]);
})

Trying to scrape restaurant menu data using node.js / cheerio - but returning blank array

I am trying to scrape some menu information from the following restaurant: Red Cow url: "http://redcowmn.com/50th-street-menu/"
I want menu item name and the price, for now I am trouble-shooting just getting name information. Can someone help my in troubleshooting my code? Thanks.
I have tried the following code:
var request = require("request");
var cheerio = require("cheerio");
var fs = require("fs");
var url = "http://redcowmn.com/50th-street-menu/";
request(url, function(err, response, html) {
if(!err) {
var $ = cheerio.load(html);
var allItems = $("#block-yui_3_17_2_9_1423750151911_47600").children();
var items = [];
allItems.each(function(index) {
var result = $("#block-yui_3_17_2_9_1423750151911_47600").children().eq(0).children().eq(5).find("#menu-item-title").text();
if(result !== "") {
items.push(result);
}
});
fs.writeFile("output1.txt", JSON.stringify(items, null, 4), function(err) {
if(err) {
console.log(err);
} else {
console.log("Data has been added to a file.");
}
})
console.log(items);
}
});

Run function in series with dynamic arguments in nodeJs

Goal: (solved: updated working code below)
Create an array of urls
Create a function that accepts a single url, makes a request to that url, and writes some data to file
Run above function in series multiple times for each url in the array.
Node Dependencies: Request, Cheerio, async, fs
Here is what I have so far:
//load dependencies
var request = require("request"),
cheerio = require("cheerio"),
fs = require('fs'),
async = require('async'),
wstream = fs.createWriteStream('./model/data.txt');
//create url array
var arr = [];
for (var i = 0; i < 10; i++){
arr.push('http://www.test.com/' + i)
}
//function that gets data from url and creates write stream
function getUrlData(url, callback){
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(body);
var data = $('h1').html();
wstream.write(data);
callback();
}
});
}
//close write stream
function endStream(){
wstream.end();
}
//updated working version
async.eachSeries(arr,getUrlData,endStream);
Since you want getUrlData to be executed sequentually, you'll need to add a callback to getUrlData:
function getUrlData(url, callback){
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(body);
var data = $('h1').html();
wstream.write(data);
callback();
}
callback(error);
});
}
Then use eachSeries:
async.eachSeries(arr, getUrlData, endStream);

Categories

Resources